inst/Projects/Dive Survey/combinedDiveAnalysis.R

### COMBINED DIVE ANALYSIS ###
require(lubridate)
require(ggplot2)
require(ggspatial)
require(bio.lobster)
require(bio.utilities)
require(dplyr)
require(nlme)
require(glmmTMB)
require(DHARMa)
require(data.table)


setwd("C:/Users/HowseVJ/OneDrive - DFO-MPO/LFA 35-38 Framework Resources/Figures")

diveH = read.csv("C:/Users/HowseVJ/Documents/bio.data/bio.lobster/data/divesurvey/BOFHistoric/OD_DiveFormatted.csv")

diveH <- diveH %>% filter(LFA != 35)

sum_diveH<- diveH %>%
  group_by(transect) %>%
  summarize(
    total_lobster = sum(lobster, na.rm = TRUE),
    year = first(year),
    month = first(month),
    LFA = first(LFA),
    region=first(region),
    transect_length = first(transectlength),
    transect_width = first(transectwidth)
  )

sum_diveH$areasamp = (sum_diveH$transect_length*sum_diveH$transect_width)

sum_diveH <- sum_diveH %>% rename(tid = transect)
sum_diveH$tid<-as.character(sum_diveH$tid)
sum_diveH<-as.data.frame(sum_diveH)



diveC = read.csv("C:/Users/HowseVJ/Documents/bio.data/bio.lobster/data/divesurvey/BOF_DiveFormatted.csv")
diveC$LFA = 38
diveC$region = "Grand Manan"

sum_diveC<- diveC %>%
  group_by(tid) %>%
  summarize(
    total_lobster = sum(lobster, na.rm = TRUE),
    year = first(year),
    month = first(month),
    LFA = first(LFA),
    region=first(region),
    transect_length = first(transect_length),
    transect_width = first(transect_width)
  )

sum_diveC$areasamp = (sum_diveC$transect_length*sum_diveC$transect_width)

sum_diveC<-as.data.frame(sum_diveC)

combined_dive <- bind_rows(sum_diveH, sum_diveC)

#combined_dive$lobster_density <- (combined_dive$total_lobster / combined_dive$areasamp)*100  ## get density of lobster


#############################################

## Data is aggregated at the Transect level ##
####### negative binomial with lobster count and an offset for area... GLMM ####
nb_1 <- glmmTMB(total_lobster ~ year + 
                      month + 
                      LFA + 
                    offset(log(areasamp)), 
                    data = combined_dive, 
                    family = nbinom2())
summary(nb_1)



nb_2 <- glmmTMB(total_lobster ~ year + 
                  month + 
                  offset(log(areasamp)), 
                data = combined_dive, 
                family = nbinom2())
summary(nb_2)


nb_3<- glmmTMB(total_lobster ~ year * month + 
                                  LFA + 
                                  offset(log(areasamp)), 
                                data = combined_dive, 
                                family = nbinom2())
summary(nb_3)


##redo with a predict for september


nb_3<- glmmTMB(total_lobster ~ year | month +  # example of random effect of month
                 LFA + 
                 offset(log(areasamp)), 
               data = combined_dive, 
               family = nbinom2())
summary(nb_3)


## Get residuals in glmm models
res1model<-residuals(nb_1, type = "response")
combined_dive$residuals_1<-res1model
plot(y=combined_dive$residuals_1, x=combined_dive$year)


res2model<-residuals(nb_2, type = "response")
combined_dive$residuals_2<-res2model
plot(y=combined_dive$residuals_2, x=combined_dive$year)


res3model<-residuals(nb_3, type = "response")
combined_dive$residuals_3<-res2model
plot(y=combined_dive$residuals_3, x=combined_dive$year)

#### High variation especially around 200-

ggplot(aes(x = factor(year), y = residuals_1),data=combined_dive) +
  geom_boxplot() +
  labs(x = "Year", y = "Residuals") +
  theme_minimal() +
  facet_wrap(~ LFA)

ggplot(aes(x = factor(year), y = residuals_2), data = combined_dive) +
  geom_boxplot() +
  labs(x = "Year", y = "Residuals") +
  theme_minimal() +
  facet_wrap(~ LFA)

ggplot(aes(x = factor(year), y = residuals_3), data = combined_dive) +
  geom_boxplot() +
  labs(x = "Year", y = "Residuals") +
  theme_minimal() +
  facet_wrap(~ LFA)

## AIC AND BIC
# Extract AIC and BIC values
aic_nb1 <- AIC(nb_1)
bic_nb1 <- BIC(nb_1)

aic_nb2 <- AIC(nb_2)
bic_nb2 <- BIC(nb_2)

aic_nb3 <- AIC(nb_3)
bic_nb3 <- BIC(nb_3)

# Print AIC and BIC values
cat("Model 1 - AIC:", aic_nb1, "BIC:", bic_nb1, "\n")
cat("Model 2 - AIC:", aic_nb2, "BIC:", bic_nb2, "\n")
cat("Model 3 - AIC:", aic_nb3, "BIC:", bic_nb3, "\n")

# Residual plots
par(mfrow = c(3, 1)) 
plot(residuals(nb_1), main = "Residuals Plot - Model 1", ylab = "Residuals", xlab = "Index")
plot(residuals(nb_2), main = "Residuals Plot - Model 2", ylab = "Residuals", xlab = "Index")
plot(residuals(nb_3), main = "Residuals Plot - Model 3", ylab = "Residuals", xlab = "Index")

anova(nb_1, nb_2, nb_3)
### summary Model 2 the simple one is the best fit


sp<-predict(nb_1,combined_dive,type="response")
sum_combined<-combined_dive
sum_combined$pred<-sp

ggplot(sum_combined, aes(x=factor(month), y=pred)) +geom_boxplot()

### predicted number of lobster by month by year -- glmttmb is considering effort by offset
## used glmmtimb to see if month and year effects are important




## Data is aggregated at the month of sampling level ##


 dive_monthly <- combined_dive %>%
   group_by(year, month, LFA) %>%
   summarise(
     tid = n_distinct(tid),
     total_lobster = sum(total_lobster),
     areasamp_month = sum(areasamp)
   ) %>%
   ungroup()


#year*month+LFA+(1|monthly sept area)   total swept area per month would be offset 
nb_A <- glmmTMB(total_lobster ~ year +
                  month+
                  LFA +
                  offset(log(areasamp_month)), 
                data = dive_monthly, 
                family = nbinom2())
summary(nb_A)

nb_B<- glmmTMB(total_lobster  ~ year + 
                 LFA+
                 offset(log(areasamp_month)), 
               data = dive_monthly, 
               family = nbinom2())
summary(nb_B)

nb_C<- glmmTMB(total_lobster  ~ year + 
                 offset(log(areasamp_month)), 
               data = dive_monthly, 
               family = nbinom2())
summary(nb_C)



nb_D<- glmmTMB(total_lobster ~ year * month + 
                 LFA + 
                 offset(log(areasamp_month)), 
               data = dive_monthly, 
               family = nbinom2())
summary(nb_D)


## Get residuals in glmm models
resAmodel<-residuals(nb_A, type = "response")
dive_monthly$residuals_A<-resAmodel
plot(y=dive_monthly$residuals_A, x=dive_monthly$year)


resBmodel<-residuals(nb_B, type = "response")
dive_monthly$residuals_B<-resBmodel
plot(y=dive_monthly$residuals_B, x=dive_monthly$year)


resCmodel<-residuals(nb_C, type = "response")
dive_monthly$residuals_C<-resCmodel
plot(y=dive_monthly$residuals_C, x=dive_monthly$year)

resDmodel<-residuals(nb_D, type = "response")
dive_monthly$residuals_D<-resDmodel
plot(y=dive_monthly$residuals_D, x=dive_monthly$year)



ggplot(aes(x = factor(year), y = residuals_A),data=dive_monthly) +
  geom_boxplot() +
  labs(x = "Year", y = "Residuals") +
  theme_minimal() +
  facet_wrap(~ LFA)

ggplot(aes(x = factor(year), y = residuals_B), data = dive_monthly) +
  geom_boxplot() +
  labs(x = "Year", y = "Residuals") +
  theme_minimal() +
  facet_wrap(~ LFA)

ggplot(aes(x = factor(year), y = residuals_C), data = dive_monthly) +
  geom_boxplot() +
  labs(x = "Year", y = "Residuals") +
  theme_minimal() +
  facet_wrap(~ LFA)


ggplot(aes(x = factor(year), y = residuals_D), data = dive_monthly) +
  geom_boxplot() +
  labs(x = "Year", y = "Residuals") +
  theme_minimal() +
  facet_wrap(~ LFA)

## AIC AND BIC
# Extract AIC and BIC values
aic_nbA <- AIC(nb_A)
bic_nbA <- BIC(nb_A)

aic_nbB <- AIC(nb_B)
bic_nbB <- BIC(nb_B)

aic_nbC <- AIC(nb_C)
bic_nbC <- BIC(nb_C)

aic_nbD <- AIC(nb_D)
bic_nbD <- BIC(nb_D)
# Print AIC and BIC values
cat("Model A - AIC:", aic_nbA, "BIC:", bic_nbA, "\n")
cat("Model B - AIC:", aic_nbB, "BIC:", bic_nbB, "\n")
cat("Model C - AIC:", aic_nbC, "BIC:", bic_nbC, "\n")
cat("Model D - AIC:", aic_nbD, "BIC:", bic_nbD, "\n")
# Residual plots
par(mfrow = c(2, 2)) 
plot(residuals(nb_A), main = "Residuals Plot - Model A", ylab = "Residuals", xlab = "Index")
plot(residuals(nb_B), main = "Residuals Plot - Model B", ylab = "Residuals", xlab = "Index")
plot(residuals(nb_C), main = "Residuals Plot - Model C", ylab = "Residuals", xlab = "Index")
plot(residuals(nb_D), main = "Residuals Plot - Model D", ylab = "Residuals", xlab = "Index")

anova(nb_A, nb_B, nb_C, nb_D)

##Summary: Model C fits best... is the simplest 

### Data is subset to only September because that's when most consistent sampling was done ##

sept_dive <- dive_monthly %>%
  filter(month == 9)
sept_dive <- sept_dive %>%
  select(-residuals_A, -residuals_B, -residuals_C, -residuals_D)


nb_11 <- glmmTMB(total_lobster ~ year +
                   LFA+
                  offset(log(areasamp_month)), 
                data = sept_dive, 
                family = nbinom2())
summary(nb_11)

nb_22 <- glmmTMB(total_lobster ~ year +
                   offset(log(areasamp_month)), 
                 data = sept_dive, 
                 family = nbinom2())
summary(nb_22)

## Get residuals in glmm models
res11model<-residuals(nb_11, type = "response")
sept_dive$residuals_11<-res11model
plot(y=sept_dive$residuals_11, x=sept_dive$year)

res22model<-residuals(nb_22, type = "response")
sept_dive$residuals_22<-res22model
plot(y=sept_dive$residuals_22, x=sept_dive$year)


ggplot(aes(x = factor(year), y = residuals_11),data=sept_dive) +
  geom_boxplot() +
  labs(x = "Year", y = "Residuals") +
  theme_minimal() +
  facet_wrap(~ LFA)


ggplot(aes(x = factor(year), y = residuals_22),data=sept_dive) +
  geom_boxplot() +
  labs(x = "Year", y = "Residuals") +
  theme_minimal() +
  facet_wrap(~ LFA)


## QQ plot check
sim_res <- simulateResiduals(fittedModel = nb_22)
plotQQunif(sim_res)


## AIC AND BIC
# Extract AIC and BIC values
aic_nb11 <- AIC(nb_11)
bic_nb11 <- BIC(nb_11)

aic_nb22 <- AIC(nb_22)
bic_nb22 <- BIC(nb_22)


# Print AIC and BIC values
cat("Model 11 - AIC:", aic_nb11, "BIC:", bic_nb11, "\n")
cat("Model 22 - AIC:", aic_nb22, "BIC:", bic_nb22, "\n")

# Residual plots
plot(residuals(nb_11), main = "Residuals Plot - Model 11", ylab = "Residuals", xlab = "Index")

plot(residuals(nb_22), main = "Residuals Plot - Model 22", ylab = "Residuals", xlab = "Index")


anova(nb_11, nb_22)
### Summary: LFA doesn't really make a difference


sp<-predict(nb_22,sept_dive,type="response")
sum_combined<-sept_dive
sum_combined$pred<-sp

ggplot(sum_combined, aes(x=factor(year), y=pred)) +geom_point()

##################################

sept38<- dive_monthly %>%
  filter(month == 9)%>%
  filter(LFA ==38)
sept38 <- sept38 %>%
  select(-residuals_A, -residuals_B, -residuals_C, -residuals_D)



nb_55 <- glmmTMB(total_lobster ~ year +
                   offset(log(areasamp_month)), 
                 data = sept38, 
                 family = nbinom2())
summary(nb_55)

## Get residuals in glmm models
res55model<-residuals(nb_55, type = "response")
sept38$residuals_55<-res55model
plot(y=sept38$residuals_55, x=sept38$year)

ggplot(aes(x = factor(year), y = residuals_55),data=sept38) +
  geom_boxplot() +
  labs(x = "Year", y = "Residuals") +
  theme_minimal()


## QQ plot check
sim_res <- simulateResiduals(fittedModel = nb_55)
plotQQunif(sim_res)


## AIC AND BIC
# Extract AIC and BIC values
aic_nb55 <- AIC(nb_55)
bic_nb55 <- BIC(nb_55)

# Print AIC and BIC values
cat("Model 55 - AIC:", aic_nb55, "BIC:", bic_nb55, "\n")

# Residual plots
plot(residuals(nb_55), main = "Residuals Plot - Model 55", ylab = "Residuals", xlab = "Index")


































## Data.table version to aggregate data by year and month
DMonth <- as.data.table(combined_dive)
DMonth<-dive_monthly[,.(no_trans = length(unique(tid)),tot_area = sum(areasamp),tot_lob=sum(total_lobster)),by = .(year, month,LFA)]


  ggplot()+
    geom_boxplot(aes(x=month,y=no_trans), data=DMonth)



dive_year<-dive_monthly%>%
  group_by(year)%>%
  summarise(
    tid = n_distinct(tid),
    total_lobster = sum(total_lobster),
    areasamp_month = sum(areasamp_month) ) %>%
  ungroup()




###NOTE: Check summarized data esp dplyr
### need to show why sept is special - sampling effort mostly - longer time series 
### want total lobster taking into account the offset
##predicted values for each dataframe
## number of transect per year
## Changes in sampling regime 
## massive spike largely due to sampling methods
##  year and month are important and LFA is different
##


# read JARA use ?
### Try Ar1() option for autoregression Order
#When to Use AR(1):
 # Temporal Data: When you have repeated measures over time and expect that observations closer in time are more correlated.
#Spatial Data: When you have spatial data and expect that observations closer in space are more correlated.
#Benefits:
#  Improves Model Fit: By accounting for correlations in the data, the model can provide more accurate estimates.
#Reduces Bias: Ignoring correlations can lead to biased parameter estimates and incorrect inferences.



# Convert relevant columns to numeric
#combined_dive$total_lobster <- as.numeric(combined_dive$total_lobster)
#combined_dive$areasamp <- as.numeric(combined_dive$areasamp)

#numeric_agg <- aggregate(cbind(total_lobster, areasamp) ~ year + month + LFA, data = combined_dive, sum)
#tid_agg <- aggregate(tid ~ year + month + LFA, data = combined_dive, function(x) length(unique(x)))
#dive_monthly <- merge(numeric_agg, tid_agg, by = c("year", "month", "LFA"))
LobsterScience/bio.lobster documentation built on Feb. 14, 2025, 3:28 p.m.